library(brms)
library(rstan)
library(tidyverse)
library(cowplot)
library(colorblindr)
library(ggpubr)
library(tidybayes)
library(modelr)
# recommended code to speed up stan
# see https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
max_eval = 30
rq3 <- read_csv("data/rq3.csv") %>%
mutate(
eval_period_normalized = ((evalPeriod - max_eval) / max_eval) + 0.5
)
rq3 %>%
ggplot(aes(x = evalPeriod, y = dv, color = treatment)) +
stat_summary(fun.data = mean_se) +
geom_hline(yintercept = 1) +
stat_smooth(method = lm) +
facet_grid(.~treatment) +
theme(legend.position = "none")
Commented out.
get_prior(bf(
dv ~ eval_period_normalized + treatment + eval_period_normalized * treatment + (1|usertoken),
phi ~ eval_period_normalized * treatment
),
data = rq3, family = Beta)
## prior class coef
## (flat) b
## (flat) b eval_period_normalized
## (flat) b eval_period_normalized:treatmentdensity
## (flat) b eval_period_normalized:treatmentdotplot
## (flat) b eval_period_normalized:treatmenthops
## (flat) b eval_period_normalized:treatmenthopsdist
## (flat) b eval_period_normalized:treatmentinterval
## (flat) b eval_period_normalized:treatmentpoint
## (flat) b eval_period_normalized:treatmenttable
## (flat) b treatmentdensity
## (flat) b treatmentdotplot
## (flat) b treatmenthops
## (flat) b treatmenthopsdist
## (flat) b treatmentinterval
## (flat) b treatmentpoint
## (flat) b treatmenttable
## student_t(3, 0, 2.5) Intercept
## student_t(3, 0, 2.5) sd
## student_t(3, 0, 2.5) sd
## student_t(3, 0, 2.5) sd Intercept
## (flat) b
## (flat) b eval_period_normalized
## (flat) b eval_period_normalized:treatmentdensity
## (flat) b eval_period_normalized:treatmentdotplot
## (flat) b eval_period_normalized:treatmenthops
## (flat) b eval_period_normalized:treatmenthopsdist
## (flat) b eval_period_normalized:treatmentinterval
## (flat) b eval_period_normalized:treatmentpoint
## (flat) b eval_period_normalized:treatmenttable
## (flat) b treatmentdensity
## (flat) b treatmentdotplot
## (flat) b treatmenthops
## (flat) b treatmenthopsdist
## (flat) b treatmentinterval
## (flat) b treatmentpoint
## (flat) b treatmenttable
## student_t(3, 0, 2.5) Intercept
## group resp dpar nlpar bound source
## default
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## default
## default
## usertoken (vectorized)
## usertoken (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi (vectorized)
## phi default
pr_beta = c(
prior(normal(0, 1), class = b),
# these prior intercepts are wide and cover 0 (50% on the logit scale), but
# also assume some likely better-than-50% performance on average --- this
# was chosen to aid convergence during the pilot, but does not have a strong
# impact on final estimates.
prior(normal(2, 2), class = Intercept),
prior(normal(2, 2), class = Intercept, dpar = phi),
prior(normal(0, 1), class = b, dpar = phi),
prior(student_t(3, 0, 1), class = sd)
)
Let’s fit the model:
warmup = 2000
iter = warmup + 2000
thin = 2
mbeta = brm(bf(
dv ~ eval_period_normalized + treatment + eval_period_normalized * treatment + (1|usertoken),
phi ~ eval_period_normalized * treatment),
data = rq3,
prior = pr_beta,
control = list(adapt_delta = 0.9995, max_treedepth = 15, stepsize = 0.005),
warmup = warmup, iter = iter, thin = thin,
family = Beta
)
# message https://discourse.mc-stan.org/t/dealing-with-catalina/11285/254
# pairs(mbeta$fit, pars = c("b_Intercept", "b_trial_normalized", "sd_participant__Intercept", "sd_participant__trial_normalized",
# "sd_scenario__Intercept",
# "cor_participant__Intercept__trial_normalized", "b_phi_Intercept", "b_phi_trial_normalized"))
# save model - core model
# dv ~ eval_period_normalized + treatment + eval_period_normalized * treatment + (1|usertoken),
# phi ~ eval_period_normalized * treatment),
save(mbeta, file = "models/fit1.rda")
# load model
load("models/fit1.rda")
summary(mbeta)
## Family: beta
## Links: mu = logit; phi = log
## Formula: dv ~ eval_period_normalized + treatment + eval_period_normalized * treatment + (1 | usertoken)
## phi ~ eval_period_normalized * treatment
## Data: rq3 (Number of observations: 1393)
## Samples: 4 chains, each with iter = 4000; warmup = 2000; thin = 2;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~usertoken (Number of levels: 199)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.63 0.04 0.55 0.72 1.00 1946 3028
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI
## Intercept 1.85 0.13 1.60
## phi_Intercept 2.62 0.11 2.40
## eval_period_normalized 1.10 0.15 0.80
## treatmentdensity -0.51 0.19 -0.89
## treatmentdotplot -0.38 0.19 -0.73
## treatmenthops -0.40 0.20 -0.80
## treatmenthopsdist -0.31 0.19 -0.68
## treatmentinterval -0.02 0.19 -0.38
## treatmentpoint 0.18 0.19 -0.19
## treatmenttable -0.27 0.19 -0.65
## eval_period_normalized:treatmentdensity -1.10 0.25 -1.60
## eval_period_normalized:treatmentdotplot -0.24 0.26 -0.73
## eval_period_normalized:treatmenthops -0.36 0.26 -0.89
## eval_period_normalized:treatmenthopsdist -0.10 0.24 -0.57
## eval_period_normalized:treatmentinterval -0.16 0.24 -0.63
## eval_period_normalized:treatmentpoint -0.08 0.26 -0.58
## eval_period_normalized:treatmenttable -0.55 0.26 -1.05
## phi_eval_period_normalized 0.19 0.26 -0.33
## phi_treatmentdensity -0.55 0.16 -0.86
## phi_treatmentdotplot -0.86 0.15 -1.14
## phi_treatmenthops -0.65 0.16 -0.97
## phi_treatmenthopsdist -0.34 0.16 -0.66
## phi_treatmentinterval -0.21 0.16 -0.52
## phi_treatmentpoint -0.62 0.16 -0.93
## phi_treatmenttable -0.69 0.16 -1.01
## phi_eval_period_normalized:treatmentdensity -0.20 0.43 -1.00
## phi_eval_period_normalized:treatmentdotplot -0.18 0.40 -0.94
## phi_eval_period_normalized:treatmenthops -0.36 0.43 -1.21
## phi_eval_period_normalized:treatmenthopsdist -0.30 0.43 -1.15
## phi_eval_period_normalized:treatmentinterval -0.02 0.41 -0.83
## phi_eval_period_normalized:treatmentpoint 0.64 0.38 -0.10
## phi_eval_period_normalized:treatmenttable -0.02 0.43 -0.88
## u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.11 1.00 953 1796
## phi_Intercept 2.84 1.00 1993 3304
## eval_period_normalized 1.39 1.00 2046 2732
## treatmentdensity -0.13 1.00 1126 1841
## treatmentdotplot -0.02 1.00 1198 2203
## treatmenthops -0.01 1.00 1270 2416
## treatmenthopsdist 0.07 1.00 1248 2304
## treatmentinterval 0.34 1.00 1101 2130
## treatmentpoint 0.55 1.00 1328 2097
## treatmenttable 0.11 1.00 1390 2195
## eval_period_normalized:treatmentdensity -0.61 1.00 2139 3062
## eval_period_normalized:treatmentdotplot 0.27 1.00 2537 3092
## eval_period_normalized:treatmenthops 0.16 1.00 2644 3168
## eval_period_normalized:treatmenthopsdist 0.36 1.00 2644 2984
## eval_period_normalized:treatmentinterval 0.30 1.00 2291 3127
## eval_period_normalized:treatmentpoint 0.42 1.00 2559 3424
## eval_period_normalized:treatmenttable -0.03 1.00 2363 3181
## phi_eval_period_normalized 0.71 1.00 2094 2700
## phi_treatmentdensity -0.24 1.00 2118 2977
## phi_treatmentdotplot -0.57 1.00 2228 3085
## phi_treatmenthops -0.33 1.00 2310 2934
## phi_treatmenthopsdist -0.03 1.00 2413 3142
## phi_treatmentinterval 0.10 1.00 2330 3118
## phi_treatmentpoint -0.32 1.00 2461 2809
## phi_treatmenttable -0.38 1.00 2502 3227
## phi_eval_period_normalized:treatmentdensity 0.65 1.00 2154 3480
## phi_eval_period_normalized:treatmentdotplot 0.62 1.00 2019 3243
## phi_eval_period_normalized:treatmenthops 0.49 1.00 2457 3217
## phi_eval_period_normalized:treatmenthopsdist 0.51 1.00 2681 3386
## phi_eval_period_normalized:treatmentinterval 0.77 1.00 2801 3404
## phi_eval_period_normalized:treatmentpoint 1.39 1.00 2339 3261
## phi_eval_period_normalized:treatmenttable 0.82 1.00 2847 3386
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
treatment_gg <- conditional_effects(mbeta, "treatment") # conditions = data.frame(size = 1)
treatment_gg
conditional_effects(mbeta) # conditions = data.frame(size = 1)
plot(mbeta)
#get_variables(mbeta)
pp_check(mbeta)
#+scale_x_continuous(breaks = seq(-0.5, 0.5, length.out = 7), labels = c("1", "5","10","15","20","25", "30"))
#q75 <- function(y) quantile(y, 0.75)
#pp_check(mbeta, type='stat', stat='mean')
#+scale_x_continuous(breaks = seq(-0.5, 0.5, length.out = 7), labels = c("1", "5","10","15","20","25", "30"))
bayesplot::pp_check(mbeta, type = "violin_grouped", group = "treatment")
bayesplot::pp_check(mbeta, type = "error_hist")
library(tidybayes)
library(modelr)
pred_beta =
rq3 %>%
data_grid(
treatment,
eval_period_normalized = seq_range(eval_period_normalized, n = 20)
) %>%
add_predicted_samples(mbeta, re_formula = NULL, allow_new_levels = TRUE) %>%
mean_qi(.prob = c(.95, .8, .5))
# shared properties of posterior prediction and fit line plots
fit_line_plot_settings = list(
scale_x_continuous(breaks = seq(-0.5, 0.5, length.out = 7), labels = c("1", "5","10","15","20","25", "30")),
coord_cartesian(expand = FALSE),
xlab("Evaluation Period"),
theme(
panel.grid = element_blank(), panel.spacing.x = unit(5, "points"),
strip.background = element_blank(), strip.text = element_text(hjust = 0.5, color = "black"),
axis.title.x = element_text(hjust = 0)
))
vis_display_order <- c("point","barchart","interval","table","hopsdist","dotplot","hops","density")
labs_vis = c("Point + Interval","Bar Chart","Interval","Table","HOP + Strip","Dot plot","HOP","Probability Density")
labeller = list(labs_vis)
vis_labels = list(
"point"="Point + Interval",
"barchart"="Bar Chart",
"interval"="Interval",
"table"="Table",
"hopsdist"="HOP + Strip",
"dotplot"="Dot plot",
"hops"="HOP",
"density"="Prob. Density"
)
vis_labeller <- function(variable,value){
return(vis_labels[value])
}
#vnames(labs_vis) = vis_display_order
post_pred_plot = pred_beta %>%
ungroup() %>%
#mutate(treatment = forcats::fct_relevel(treatment, vis_display_order)) %>%
ggplot(aes(x = eval_period_normalized)) +
geom_lineribbon(aes(y = pred), data = pred_beta) +
geom_line(aes(y = pred), data = pred_beta, color = "red",size = 1) +
geom_hline(yintercept = 1) +
scale_fill_brewer(guide = guide_legend(reverse = TRUE)) +
geom_hline(yintercept = seq(.3, .95, by=.1), color="gray75", alpha = 0.5) +
fit_line_plot_settings +
#facet_grid(. ~ treatment, labeller = labeller(treatment = labs_vis)) +
facet_grid(. ~ forcats::fct_relevel(treatment, vis_display_order), labeller = vis_labeller) +
labs(title = " ", y = "") +
geom_point(aes(y = dv), alpha = 0.05, data = rq3)
post_pred_plot
Above, the red line is the predicted median, and the blue bands are predictive intervals for the data. We can see that most conditions have a distance between 1 year and 30 year evaluation periods which indicate myopic loss aversion. However, there are some differences, and it is hard to tell how reliable those differences are just by looking at predictions. So, let’s look at posteriors for the mean and precision of estimates according to the model.
First we’ll generate samples of fit lines for conditional means and variances. We will use these to plot fit lines and to generate estimates for performance in the 1 and 30 year evaluation periods. These estimates will be for the “average” scenario and “average” person:
fit_lines = rq3 %>%
data_grid(
treatment,
eval_period_normalized = seq_range(eval_period_normalized, n = 20)
) %>%
add_fitted_samples(mbeta, re_formula = NA, var = "mu", dpar = TRUE) %>%
ungroup()
#mutate(vis = fct_relevel(vis, vis_display_order))
The estimates of \(\phi\) (phi, the precision parameter of the beta distribution) are a little hard to interpret, so instead we’ll derive a posterior distribution for standard deviation. We can use the fact that the standard deviation \(\sigma\) of a Beta distribution is:
\[ \sigma = \sqrt{\frac{\mu (1 - \mu)}{(1 + \phi)}} \]
Thus we can transform samples from the distribution of \(\mu\) (mu) and \(\phi\) (phi) into samples from the distribution of \(\sigma\) (sd):
fit_lines <- fit_lines %>%
mutate(sd = sqrt(mu * (1 - mu) / (1 + phi)))
mu_mean = fit_lines %>%
group_by(treatment, eval_period_normalized) %>%
summarise(mu_mean=mean(mu))
Estimates of the mean for the “average” scenario and “average” person:
scale_fill_fit_lines = scale_fill_manual(
values = RColorBrewer::brewer.pal(4, "Greys")[-1], guide = guide_legend(reverse = TRUE)
)
mu_lines_plot = fit_lines %>%
ggplot(aes(x = eval_period_normalized, y = mu)) +
stat_lineribbon(.prob = c(.95, .8, .5)) +
geom_hline(yintercept = seq(.7, 1, by=.05), color="gray75", alpha = 0.5) +
geom_line(aes(y = mu_mean), data = mu_mean, color = "red", size = 1) +
facet_grid(. ~ forcats::fct_relevel(treatment, vis_display_order), labeller = vis_labeller) +
scale_fill_fit_lines +
fit_line_plot_settings
mu_lines_plot
sd_mean = fit_lines %>%
group_by(treatment, eval_period_normalized) %>%
summarise(sd_mean=mean(sd))
Estimates of the standard deviation for the “average” scenario and “average” person:
fit_line_plot_2 = list(
scale_x_continuous(breaks = seq(-0.5, 0.5, length.out = 7), labels = c("1", "5","10","15","20","25", "30")),
coord_cartesian(expand = FALSE),
xlab("Evaluation Period"),
theme(
panel.grid = element_blank(),
panel.spacing.x = unit(5, "points"),
strip.background = element_blank(),
strip.text = element_text(hjust = 0.5, color = "black"),
axis.title.x = element_text(hjust = 0)
))
sd_lines_plot = fit_lines %>%
ggplot(aes(x = eval_period_normalized, y = sd)) +
stat_lineribbon(.prob = c(.95, .8, .5)) +
geom_hline(yintercept = seq(0, .2, by=.05), color="gray75", alpha = 0.5) +
geom_line(aes(y = sd_mean), data = sd_mean, color = "red", size = 1) +
facet_grid(. ~ forcats::fct_relevel(treatment, vis_display_order), labeller = vis_labeller) +
scale_fill_fit_lines +
fit_line_plot_2
sd_lines_plot
dv_returns <- read_csv("data/dv-returns.csv")
optimal_expreturn <- max(dv_returns$ExpReturn)
getExpectedInvestment <- function(dv, investment, optimal_expreturn){
return = dv * optimal_expreturn
scales::dollar(investment * ((1 + return) ^ 30))
}
# 35 year old with $50,000 investment for different dv's
dollarValue = getExpectedInvestment(seq(0.4,1,0.2),50000, optimal_expreturn)
1 year and 30 year evaluation periods.
eval_period_ends = rq3 %>%
data_grid(
treatment,
eval_period_normalized = c(-0.467, .5) #c(-0.467, .5) # because we normalized trial to be from -0.5 to 0.5
) %>%
add_fitted_samples(mbeta, re_formula = NA, var = "mu", dpar = TRUE) %>%
ungroup() %>%
mutate(treatment = fct_rev(fct_relevel(treatment, vis_display_order))) %>%
mutate(sd = sqrt(mu * (1 - mu) / (1 + phi)))
Conditional means (for “average” person on “average” scenario) on last trial:
plot_means = eval_period_ends %>%
mutate(evalPeriod = if_else(eval_period_normalized==0.5,"30","1")) %>%
mutate(evalPeriod = factor(evalPeriod, levels = c("30","1"))) %>%
ggplot(aes(y = treatment, x = mu, group = evalPeriod, fill = evalPeriod)) +
geom_halfeyeh(fun.data = median_qih, fatten.point = 0.8) +
scale_fill_OkabeIto(alpha = 0.5) +
geom_vline(xintercept = 1, linetype = "dashed", color = "black") +
cowplot::theme_minimal_vgrid()
#coord_cartesian(xlim = c(0.85, 1.0), ylim = c(1, 10.5))
plot_means %>%
ggsave(filename = "img/rq/rq4-mu.png",
height = 5,
width = 6)
plot_means
Mean differences by evaluation periods
mean_diff = eval_period_ends %>%
mutate(evalPeriod = if_else(eval_period_normalized==0.5,"ep30","ep1")) %>%
mutate(evalPeriod = factor(evalPeriod, levels = c("ep30","ep1"))) %>%
mutate(evalPeriod = fct_relevel(evalPeriod, "ep1")) %>%
select(treatment, mu, evalPeriod, .iteration) %>%
pivot_wider(
names_from = evalPeriod,
values_from = mu
) %>%
mutate(mu_diff = ep30 - ep1) %>%
ggplot(aes(y = treatment, x = mu_diff)) +
geom_halfeyeh(fun.data = median_qih, fatten.point = 0.8) +
scale_fill_OkabeIto(alpha = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
cowplot::theme_minimal_vgrid() +
xlim(-.25,.25)
#coord_cartesian(xlim = c(0.85, 1.0), ylim = c(1, 10.5))
mean_diff
Conditional standard deviation (for “average” person on “average” scenario) on last trial:
plot_sd = eval_period_ends %>%
mutate(evalPeriod = if_else(eval_period_normalized==0.5,"30","1")) %>%
mutate(evalPeriod = factor(evalPeriod, levels = c("30","1"))) %>%
ggplot(aes(y = treatment, x = sd, group = evalPeriod, fill = evalPeriod)) +
geom_halfeyeh(fun.data = median_qih, fatten.point = 0.8) +
scale_fill_OkabeIto(alpha = 0.5) +
geom_vline(xintercept = 0, color = "darkgrey") +
cowplot::theme_minimal_vgrid()
#coord_cartesian(xlim = c(0.85, 1.0), ylim = c(1, 10.5))
plot_sd %>%
ggsave(filename = "img/rq/rq4-std.png",
height = 5,
width = 6)
plot_sd